Summary Line Charts_all sample_G vs NG
# All samples (G vs NG)
library(ggplot2)
library(ggthemes)
library (gridExtra)
# All sample
df.1<-df.all%>%
filter(Genotype!="Negative")%>%
gather(type,value,-c(No,ID,Genotype,Sex,GenotypeSex,Cage))%>% # convert the original table to long table, create two new columns and put them after -c()
separate(type,c("hr","Treatment"),remove=T) # Seprate type into two columns, e.g. "24hr_NG", separate function can separate the item which has non-alphabet/number symbol into two parts
df.1 <- df.1[-c(3:5)]
# Summarise data
summary<-df.1 %>%
group_by(Treatment,hr) %>%
summarise(value.mean=mean(value), sd=sd(value))%>%
ungroup()
# min.line
min.line<-summary%>%
select(-sd)%>%
mutate(hr = as.numeric(gsub("hr","",gsub("X","",hr))))%>%
spread(Treatment,value.mean)%>%
group_by(hr)%>%
mutate(min_line=min(G,NG))%>%
select(hr,min_line)
summary.final<-summary %>%
mutate(hr.new = as.numeric(gsub("hr","",gsub("X","",hr))))%>%
left_join(min.line,by=c("hr.new"="hr")) %>%
add_column(Group = "Sample")
# Gather negative control data
neg.ctrl<-df.all %>%
filter(Genotype == "Negative")%>%
gather(type,value,-c(No,ID,Genotype,Sex,GenotypeSex,Cage))%>%
separate(type,c("hr","Treatment"),remove=T)%>%
group_by(Genotype,Treatment,hr) %>%
summarise(value.mean=mean(value),sd=sd(value))%>% #Summarize mean and standard deviation for future figure use
ungroup()%>%
mutate(group=paste(Treatment,Genotype,sep=" "))%>%
mutate(hr.new=as.numeric(gsub("hr","",gsub("X","",hr))))%>%
mutate(min_line=0) %>%
select(-c(group)) %>%
rename(Group = Genotype)
# select(Genotype,hr,Treatment,value.mean)%>%
# rename(Genotype=ID)%>%
# mutate(sd=0)#The negative control is under ID column, to bind negative control and my other data, the ID column should be renamed to Genotype
#bind data
summary.final2<-rbind(summary.final,neg.ctrl)
summary.final2<-summary.final2 %>%
mutate(Combine=paste(Treatment,Group,sep=" ")) %>%
mutate(Treatment = ifelse(Combine == "G Negative", "Media control + 2'-FL",
ifelse(Combine == "NG Negative", "Media control",
ifelse(Combine == "G Sample", "mBasal + 2'-FL",
ifelse(Combine == "NG Sample", "mBasal","ditch")))))
calculus<-summary.final %>%
group_by(Treatment) %>%
arrange(Treatment,hr.new)%>% #Sort your data based on ID,Treatment,hr.new
mutate(sum.value=value.mean+lag(value.mean), #sum.value is the summary of the two base value
hr.interval=hr.new-lag(hr.new))%>% # hr.interval is the height of the trapezoid
mutate(area=sum.value*hr.interval/2)%>% #this area is the total area of the highest curve
# summarise(area.total=sum(area,na.rm = T))%>% # Summarize different timepoint area
ungroup() %>%
group_by(Treatment)%>%
summarise(total.area=sum(area,na.rm=T))%>%
ungroup()%>%
mutate(area.difference=lag(total.area)-total.area)%>%
ungroup()%>%
filter(!is.na(area.difference))%>%
select(area.difference)#You can't rejoin the dataset because this is summarised and no id is there.
figure_all <- ggplot(data=subset(summary.final2), aes(x=hr.new,y=value.mean,group=Treatment))+
geom_line(size=1,aes(color=Treatment)) +
geom_point(size=2,aes(color=Treatment,shape=Treatment)) +
geom_pointrange(aes(ymin=value.mean-sd, ymax=value.mean+sd,color=Treatment)) + # error bar method 1 (just line)
theme_bw() +
labs(x="Time (hr)",y=bquote(OD[600]),caption = "",
title = paste0(""))+ # Faeces collected from KO mice, you can use "title = paste0(k,"+ Negative")
theme(legend.position="bottom",
legend.title=element_blank(),
legend.text = element_text(size=14),
axis.text.x=element_text(colour="black", face="plain", size=12),
axis.text.y=element_text(colour="black", face="plain", size=12),
axis.title.x=element_text(margin = margin(t = 5), size=12),
axis.title.y=element_text(margin = margin(r = 5), size=12),
axis.title = element_text(face="plain", size = 10),
plot.title = element_text(size=10, hjust = 0.5),
panel.grid = element_blank(),
panel.border = element_rect(color = "black",
fill = NA,
size = 1),
aspect.ratio = 1,
plot.background = element_rect(fill="transparent", colour = "transparent"))+
scale_y_continuous(limits = c(0,0.6),breaks = seq(0,0.6,0.1))+
scale_x_continuous(limits=c(0,120),breaks=seq(0,120,24))+
scale_colour_manual(values=c("#b86877","#70a1a7","#3B3B3BB2","#868686B2")) +
geom_ribbon(aes(ymax=value.mean, ymin=min_line),fill="#cfcfcf",alpha = 0.2)+ # ivory3, seashell3, gray66, #87c0e6, #ffa0aa, #808080
guides(color=guide_legend(nrow=2,byrow=TRUE))+
annotate("text",x=60,y=0.6,
label=expression(italic("P") < "0.0001"),hjust=0.5, size=4)
figure_all

Calculate area of each sample
calculus2<-df.1 %>%
mutate(hr.new = as.numeric(gsub("hr","",gsub("X","",hr))))%>%
group_by(ID,Treatment) %>%
arrange(ID,Treatment,hr.new) %>%
mutate(sum.value=value+lag(value),
hr.interval=hr.new-lag(hr.new)) %>%
mutate(area=sum.value*hr.interval/2) %>%
ungroup() %>%
group_by(ID,Treatment) %>%
summarise(total.area=sum(area,na.rm=T)) %>%
ungroup() %>%
spread(Treatment,total.area)
set.seed(123)
# Choosing a normality test
## Answer: D'Agostino-Pearson normality test ("omnibus K2" test)
## Reasons: It first computes the skewness and kurtosis to quantify how far the distribution is from Gaussian in terms of asymmetry and shape. It then calculates how far each of these values differs from the value expected with a Gaussian distribution, and computes a single P value from the sum of these discrepancies. It is a versatile and powerful normality test. (https://www.graphpad.com/guides/prism/latest/statistics/stat_choosing_a_normality_test.htm)
# D'Agostino-Pearson normality test ("omnibus K2" test)
##Null (H0): The data are normally distributed Alternate/the data are sampled from a Gaussian distribution
##The low p-value (<0.05) indicates you reject that null hypothesis and so accept the alternative hypothesis that the data are not sampled from a Gaussian population
## https://www.rdocumentation.org/packages/PoweR/versions/1.0.7/topics/statcompute
# Example
library(PoweR)
getindex() # index 6 is D'Agostino-Pearson normality test ("omnibus K2" test)
## Index Law Nbparams Default1 Default2 Default3
## 1 1 Laplace(mu,b) 2 0.000000 1 NA
## 2 2 Normal(mu,sigma) 2 0.000000 1 NA
## 3 3 Cauchy(mu,sigma) 2 0.000000 1 NA
## 4 4 Logistic(mu,sigma) 2 0.000000 1 NA
## 5 5 Gamma(shape,rate) 2 2.000000 1 NA
## 6 6 Beta(a,b) 2 1.000000 1 NA
## 7 7 Uniform(a,b) 2 0.000000 1 NA
## 8 8 Student-t(df) 1 1.000000 NA NA
## 9 9 Chi-squared(df) 1 1.000000 NA NA
## 10 10 Lognormal(logmean,logsd) 2 0.000000 1 NA
## 11 11 Weibull(shape,scale) 2 1.000000 1 NA
## 12 12 ShiftedExp(l,rate) 2 0.000000 1 NA
## 13 13 U^{j+1} 1 1.000000 NA NA
## 14 14 AveUnif(k,a,b) 3 2.000000 0 1.0
## 15 15 UUnif(j) 1 1.000000 NA NA
## 16 16 VUnif(j) 1 1.000000 NA NA
## 17 17 JSU(mu,sigma,nu,tau) 4 0.000000 1 0.0
## 18 18 Tukey(l) 1 1.000000 NA NA
## 19 19 LoConN(p,m) 2 0.200000 3 NA
## 20 20 JSB(g,d) 2 0.000000 1 NA
## 21 21 SkewN(xi,omega,alpha) 3 0.000000 1 0.0
## 22 22 ScConN(p,d) 2 0.200000 2 NA
## 23 23 GP(mu,sigma,xi) 3 0.000000 1 0.0
## 24 24 GED(mu,sigma,p) 3 0.000000 1 1.0
## 25 25 Stable(alpha,beta,c,mu) 4 1.000000 0 1.0
## 26 26 Gumbel(mu,sigma) 2 1.000000 1 NA
## 27 27 Frechet(mu,sigma,alpha) 3 0.000000 1 1.0
## 28 28 GEV(mu,sigma,xi) 3 0.000000 1 0.0
## 29 29 GArcSine(alpha) 1 0.500000 NA NA
## 30 30 FoldN(mu,sigma) 2 0.000000 1 NA
## 31 31 MixN(p,m,d) 3 0.500000 0 1.0
## 32 32 TruncN(a,b) 2 0.000000 1 NA
## 33 33 Nout(a) 1 1.000000 NA NA
## 34 34 GEP(t1,t2,t3,crit) 4 0.500000 0 1.0
## 35 35 Exponential(lambda) 1 1.000000 NA NA
## 36 36 ALaplace(mu,b,k) 3 0.000000 1 2.0
## 37 37 NIG(alpha,beta,delta,mu) 4 1.000000 0 1.0
## 38 38 APD(theta,phi,alpha,lambda) 4 0.000000 1 0.5
## 39 39 modAPD(mu,sigma,theta1,theta2) 4 0.000000 1 0.5
## 40 40 LPtn(alpha,mu,sigma) 3 1.959964 0 1.0
## Default4
## 1 NA
## 2 NA
## 3 NA
## 4 NA
## 5 NA
## 6 NA
## 7 NA
## 8 NA
## 9 NA
## 10 NA
## 11 NA
## 12 NA
## 13 NA
## 14 NA
## 15 NA
## 16 NA
## 17 5e-01
## 18 NA
## 19 NA
## 20 NA
## 21 NA
## 22 NA
## 23 NA
## 24 NA
## 25 0e+00
## 26 NA
## 27 NA
## 28 NA
## 29 NA
## 30 NA
## 31 NA
## 32 NA
## 33 NA
## 34 1e-06
## 35 NA
## 36 NA
## 37 0e+00
## 38 2e+00
## 39 2e+00
## 40 NA
## Index Stat Alter Nbparams
## 1 1 K-S 3 0
## 2 2 AD^* 3 0
## 3 3 Z_C 3 0
## 4 4 Z_A 3 0
## 5 5 P_S 3 0
## 6 6 K^2 3 0
## 7 7 JB 3 0
## 8 8 DH 3 0
## 9 9 RJB 3 0
## 10 10 T_{Lmom} 3 0
## 11 11 T_{Lmom}^{(1)} 3 0
## 12 12 T_{Lmom}^{(2)} 3 0
## 13 13 T_{Lmom}^{(3)} 3 0
## 14 14 BM_{3-4} 3 0
## 15 15 BM_{3-6} 3 0
## 16 16 T_{MC-LR} 3 0
## 17 17 T_w 0,1,2 0
## 18 18 T_{MC-LR}-T_w 3 0
## 19 19 T_{S,5} 3 0
## 20 20 T_{K,5} 3 0
## 21 21 W 4 0
## 22 22 W' 4 0
## 23 23 tilde{W} 4 0
## 24 24 D 0,1,2 0
## 25 25 r 4 0
## 26 26 CS 3 0
## 27 27 Q 0,1,2 0
## 28 28 Q-Q* 0,1,2 0
## 29 29 BCMR 3 0
## 30 30 beta_3^2 3 0
## 31 31 T^*(alpha) 1 1
## 32 32 I_n 3 0
## 33 33 R_{sJ} 3 0
## 34 34 Q* 0,1,2 0
## 35 35 R_n 3 0
## 36 36 X_{APD} 3 0
## 37 37 Z_{EPD} 0,1,2 0
## 38 38 GLB 3 0
## 39 39 V_3-ML 0,1,2 0
## 40 40 V_4-ML 0,1,2 0
## 41 41 S 3 0
## 42 42 A^2 3 0
## 43 43 W^2 3 0
## 44 44 U^2 3 0
## 45 45 sqrt{n}D 3 0
## 46 46 V 3 0
## 47 47 T_{n,a}^{(1)}-MO 3 1
## 48 48 T_{n,a}^{(1)}-ML 3 1
## 49 49 T_{n,a}^{(2)}-MO 3 1
## 50 50 T_{n,a}^{(2)}-ML 3 1
## 51 51 T_{m,n}^{V} 4 1
## 52 52 T_{m,n}^{E} 4 1
## 53 53 T_{m,n}^{C} 4 1
## 54 54 hat{G}_n 3 0
## 55 55 V_3 0,1,2 0
## 56 56 V_4 0,1,2 0
## 57 57 K_1 3 0
## 58 58 T 3 0
## 59 59 Z 3 0
## 60 60 K 3 0
## 61 61 DLLap1 0,1,2 0
## 62 62 DLLap2 0,1,2 0
## 63 63 D_n 3 0
## 64 64 W_{n}^{2} 3 0
## 65 65 A_{n}^{2} 3 0
## 66 66 C_n 3 0
## 67 67 K_n 3 0
## 68 68 T_1 3 0
## 69 69 T_2 3 0
## 70 70 G(n) 3 0
## 71 71 Q 3 0
## 72 72 2nI^{lambda} 3 1
## 73 73 M(n) 3 0
## 74 74 L_{n}^{(m)} 4 1
## 75 75 S_{n}^{(m)} 3 1
## 76 76 H(m,n) 4 1
## 77 77 A^{*}(n) 3 0
## 78 78 D_{n,m}(phi_lambda) 3 2
## 79 79 E_{m,n} 3 1
## 80 80 T_{n,m}^{lambda} 3 2
## 81 81 Z_A 3 0
## 82 82 Z_C 3 0
## 83 83 t test 0,1,2 1
## 84 85 DLLap3 3 0
## 85 86 T(alpha_1,alpha_2) 3 1
## 86 87 T_n 3 0
## 87 88 T^{LS}(alpha) 3 1
## 88 89 T^{LS}_3(mu,alpha) 3 2
## 89 90 T^{LS}_4(alpha,mu) 3 2
## 90 91 GV_1 0,1,2 0
## 91 92 GV_2 0,1,2 0
## 92 93 Ho_K 0,1,2 0
## 93 94 Ho_U 0,1,2 0
## 94 95 Ho_V 0,1,2 0
## 95 96 Ho_W 0,1,2 0
## 96 97 SR^* 0,1,2 0
statcompute(6,calculus2$G, levels = c(0.05), critvalL = NULL, critvalR = NULL,
alter = 0, stat.pars = NULL)
## $statistic
## [1] 10.62906
##
## $pvalue
## [1] 0
##
## $decision
## [1] 1
##
## $alter
## [1] 3
##
## $stat.pars
## [1] NA
##
## $symbol
## [1] "K^2"
statcompute(6,calculus2$NG, levels = c(0.05), critvalL = NULL, critvalR = NULL,
alter = 0, stat.pars = NULL)
## $statistic
## [1] 1.29134
##
## $pvalue
## [1] 0
##
## $decision
## [1] 1
##
## $alter
## [1] 3
##
## $stat.pars
## [1] NA
##
## $symbol
## [1] "K^2"
calculus3 <- calculus2 %>%
gather("Treatment", "Area",-ID) %>%
mutate(Treatment = ifelse(Treatment == "G", "mBasal + 2'-FL", "mBasal"))
## Summary
Summary.area <- calculus3 %>%
group_by(Treatment) %>%
summarise(
count = n(),
median= median (Area, na.rm=TRUE),
IQR = IQR (Area,na.rm=TRUE),
IQR25 = quantile(Area, 0.25),
IQR75 = quantile(Area, 0.75)
)
Summary.area
## # A tibble: 2 × 6
## Treatment count median IQR IQR25 IQR75
## <chr> <int> <dbl> <dbl> <dbl> <dbl>
## 1 mBasal 32 15.1 2.20 14.3 16.5
## 2 mBasal + 2'-FL 32 35.6 5.46 33.3 38.7
## Area difference between mBasal and mBasal + 2'-FL
stats <- wilcox.test(Area ~ Treatment, data=calculus3,
paired=FALSE, correct=TRUE, conf.int=TRUE, conf.level=0.95, exact=FALSE)
stats
##
## Wilcoxon rank sum test with continuity correction
##
## data: Area by Treatment
## W = 0, p-value = 6.508e-12
## alternative hypothesis: true location shift is not equal to 0
## 95 percent confidence interval:
## -21.86401 -18.52206
## sample estimates:
## difference in location
## -20.16002
# Effect size
library (rstatix)
set.seed(123)
effectsize <- calculus3 %>% wilcox_effsize(Area ~ Treatment)
effectsize
## # A tibble: 1 × 7
## .y. group1 group2 effsize n1 n2 magnitude
## * <chr> <chr> <chr> <dbl> <int> <int> <ord>
## 1 Area mBasal mBasal + 2'-FL 0.859 32 32 large
# Method 2_ggpubr_to add it in the figure
## P value
library(ggpubr)
set.seed(123)
compare_means(Area ~ Treatment, data=calculus3,method = "wilcox.test", paired=TRUE, group.by = NULL, ref.group=NULL)
## # A tibble: 1 × 8
## .y. group1 group2 p p.adj p.format p.signif method
## <chr> <chr> <chr> <dbl> <dbl> <chr> <chr> <chr>
## 1 Area mBasal + 2'-FL mBasal 4.66e-10 0.00000000047 4.7e-10 **** Wilcoxon
level_order_Treatment <- c("mBasal","mBasal + 2'-FL")
# Visualization
# install.packages("hrbrthemes")
# library(hrbrthemes)
library(viridis)
figure.area <- ggplot(data=calculus3, mapping = aes(y=Area, x=factor(Treatment,level=level_order_Treatment)))+
geom_boxplot(outlier.shape = NA)+
theme_bw()+
geom_jitter(aes(colour=Treatment),
size=2,alpha=0.9,
position=position_jitterdodge(jitter.width = 0.5,
jitter.height = 0,
dodge.width = 0.8))+
labs(x="",y="Area size",caption = "",
title = "")+
theme(legend.position="none",
axis.title = element_text(face="plain", size = 10),
axis.title.x=element_text(margin = margin(t = 5)),
axis.title.y=element_text(margin = margin(r = 5), size=16),
axis.text.x=element_text(colour="black", face="plain", size=14),
axis.text.y=element_text(colour="black", face="plain", size=14),
plot.title = element_text(size=10, hjust = 0.5),
panel.grid = element_blank(),
panel.border = element_rect(color = "black",
fill = NA,
size = 1),
aspect.ratio = 1,
plot.background = element_rect(fill = "transparent", colour = "transparent"))+
scale_y_continuous(limits = c(0,50),breaks = seq(0,50,10))+
scale_colour_manual(values=c("#b86877","#70a1a7"))
figure.area

#
# annotate("text",x=0,y=0.6,
# label=paste0("Total Area = ",round(as.vector(subset(calculus,Genotype==k,select=area.difference)),2)),
# hjust=0,size=3)
#
figure.area.final <- figure.area +
annotate("text",x=1.515,y=50,
label=expression(italic("P") < "0.0001"),hjust=0.5, size=4.5)
figure.area.final
